Loading the final_results.csv to R enivronment:
HMD_BestResults <- read.csv("final_results.csv")
str(HMD_BestResults)
## 'data.frame': 456 obs. of 4 variables:
## $ Country : chr "AUS" "AUS" "AUS" "AUS" ...
## $ Sex : chr "Female" "Female" "Female" "Female" ...
## $ variable: chr "preds" "preds_boost" "pred_LC_svd" "LCwithGAM" ...
## $ mse : num 0.0548 0.0478 0.0434 0.0756 0.0412 ...
Rename the variable column to Model and the mse to MSE. Rename preds to NN, preds_boost to NN_Boost, stacked to Stacked and pred_LC_svd to LC_SVD to have more clear names on the graphs and pivots.
colnames(HMD_BestResults)[3] <- "Model"
colnames(HMD_BestResults)[4] <- "MSE"
HMD_BestResults$Model[HMD_BestResults$Model == "preds"] <- "NN"
HMD_BestResults$Model[HMD_BestResults$Model == "preds_boost"] <- "NN_Boost"
HMD_BestResults$Model[HMD_BestResults$Model == "pred_LC_svd"] <- "LC_SVD"
HMD_BestResults$Model[HMD_BestResults$Model == "stacked"] <- "Stacked"
str(HMD_BestResults)
## 'data.frame': 456 obs. of 4 variables:
## $ Country: chr "AUS" "AUS" "AUS" "AUS" ...
## $ Sex : chr "Female" "Female" "Female" "Female" ...
## $ Model : chr "NN" "NN_Boost" "LC_SVD" "LCwithGAM" ...
## $ MSE : num 0.0548 0.0478 0.0434 0.0756 0.0412 ...
Load packages for analysis:
library(ggplot2)
library(tidyr)
library(stringr)
library(dplyr)
Define function to shape the HMD_BestResults to a one country - one row styled table.
myspread <- function(df, key, value) {
# quote key
keyq <- rlang::enquo(key)
# break value vector into quotes
valueq <- rlang::enquo(value)
s <- rlang::quos(!!valueq)
df %>% gather(variable, value, !!!s) %>%
unite(temp, !!keyq, variable) %>%
spread(temp, value)
}
Apply the function on HMD_BestResults and see the results:
HMD_BestResults <- HMD_BestResults %>% spread(Sex, MSE)
HMD_BestResults <- HMD_BestResults %>% myspread(Model, c(Female,Male))
HMD_BestResults <- HMD_BestResults[,c("Country",
colnames(HMD_BestResults)[str_detect(colnames(HMD_BestResults), "_Male")],
colnames(HMD_BestResults)[str_detect(colnames(HMD_BestResults), "_Female")])]
str(HMD_BestResults)
## 'data.frame': 38 obs. of 13 variables:
## $ Country : chr "AUS" "AUT" "BEL" "BGR" ...
## $ LC_SVD_Male : num 0.0612 0.1017 0.0937 0.054 0.2347 ...
## $ LCwithGAM_Male : num 0.1159 0.0588 0.0774 0.0807 0.2646 ...
## $ NN_Boost_Male : num 0.0771 0.1174 0.0841 0.0632 0.1469 ...
## $ NN_Male : num 0.0995 0.1585 0.1122 0.0691 0.175 ...
## $ RotatedLC_Male : num 0.0599 0.0558 0.073 0.2121 0.3199 ...
## $ Stacked_Male : num 0.0708 0.0717 0.0735 0.1061 0.2313 ...
## $ LC_SVD_Female : num 0.0434 0.0823 0.0648 0.0725 0.1175 ...
## $ LCwithGAM_Female: num 0.0756 0.0753 0.077 0.0776 0.281 ...
## $ NN_Boost_Female : num 0.0478 0.0914 0.0723 0.0768 0.1084 ...
## $ NN_Female : num 0.0548 0.1042 0.0789 0.0889 0.1227 ...
## $ RotatedLC_Female: num 0.0412 0.0795 0.066 0.1051 0.3553 ...
## $ Stacked_Female : num 0.0424 0.0751 0.0665 0.071 0.2327 ...
Get best model for females:
HMD_BestResults$BestFemaleMSE <- apply(HMD_BestResults[,8:13], 1, FUN = min)
HMD_BestResults$BestFemaleModel <- NA
for (i in 1:nrow(HMD_BestResults)) {
HMD_BestResults$BestFemaleModel[i] <- names(HMD_BestResults[,2:13])[which(
HMD_BestResults[,2:13] == HMD_BestResults$BestFemaleMSE[i],
arr.ind=T)[, "col"]]
}
str(HMD_BestResults)
## 'data.frame': 38 obs. of 15 variables:
## $ Country : chr "AUS" "AUT" "BEL" "BGR" ...
## $ LC_SVD_Male : num 0.0612 0.1017 0.0937 0.054 0.2347 ...
## $ LCwithGAM_Male : num 0.1159 0.0588 0.0774 0.0807 0.2646 ...
## $ NN_Boost_Male : num 0.0771 0.1174 0.0841 0.0632 0.1469 ...
## $ NN_Male : num 0.0995 0.1585 0.1122 0.0691 0.175 ...
## $ RotatedLC_Male : num 0.0599 0.0558 0.073 0.2121 0.3199 ...
## $ Stacked_Male : num 0.0708 0.0717 0.0735 0.1061 0.2313 ...
## $ LC_SVD_Female : num 0.0434 0.0823 0.0648 0.0725 0.1175 ...
## $ LCwithGAM_Female: num 0.0756 0.0753 0.077 0.0776 0.281 ...
## $ NN_Boost_Female : num 0.0478 0.0914 0.0723 0.0768 0.1084 ...
## $ NN_Female : num 0.0548 0.1042 0.0789 0.0889 0.1227 ...
## $ RotatedLC_Female: num 0.0412 0.0795 0.066 0.1051 0.3553 ...
## $ Stacked_Female : num 0.0424 0.0751 0.0665 0.071 0.2327 ...
## $ BestFemaleMSE : num 0.0412 0.0751 0.0648 0.071 0.1084 ...
## $ BestFemaleModel : chr "RotatedLC_Female" "Stacked_Female" "LC_SVD_Female" "Stacked_Female" ...
Get best model for males:
HMD_BestResults$BestMaleMSE <- apply(HMD_BestResults[,2:7], 1, FUN = min)
HMD_BestResults$BestMaleModel <- NA
for (i in 1:nrow(HMD_BestResults)) {
HMD_BestResults$BestMaleModel[i] <- names(HMD_BestResults[,1:9])[which(
HMD_BestResults[,1:9] == HMD_BestResults$BestMaleMSE[i],
arr.ind=T)[, "col"]]
}
str(HMD_BestResults)
## 'data.frame': 38 obs. of 17 variables:
## $ Country : chr "AUS" "AUT" "BEL" "BGR" ...
## $ LC_SVD_Male : num 0.0612 0.1017 0.0937 0.054 0.2347 ...
## $ LCwithGAM_Male : num 0.1159 0.0588 0.0774 0.0807 0.2646 ...
## $ NN_Boost_Male : num 0.0771 0.1174 0.0841 0.0632 0.1469 ...
## $ NN_Male : num 0.0995 0.1585 0.1122 0.0691 0.175 ...
## $ RotatedLC_Male : num 0.0599 0.0558 0.073 0.2121 0.3199 ...
## $ Stacked_Male : num 0.0708 0.0717 0.0735 0.1061 0.2313 ...
## $ LC_SVD_Female : num 0.0434 0.0823 0.0648 0.0725 0.1175 ...
## $ LCwithGAM_Female: num 0.0756 0.0753 0.077 0.0776 0.281 ...
## $ NN_Boost_Female : num 0.0478 0.0914 0.0723 0.0768 0.1084 ...
## $ NN_Female : num 0.0548 0.1042 0.0789 0.0889 0.1227 ...
## $ RotatedLC_Female: num 0.0412 0.0795 0.066 0.1051 0.3553 ...
## $ Stacked_Female : num 0.0424 0.0751 0.0665 0.071 0.2327 ...
## $ BestFemaleMSE : num 0.0412 0.0751 0.0648 0.071 0.1084 ...
## $ BestFemaleModel : chr "RotatedLC_Female" "Stacked_Female" "LC_SVD_Female" "Stacked_Female" ...
## $ BestMaleMSE : num 0.0599 0.0558 0.073 0.054 0.1469 ...
## $ BestMaleModel : chr "RotatedLC_Male" "RotatedLC_Male" "RotatedLC_Male" "LC_SVD_Male" ...
See the best model frequencies:
table(HMD_BestResults$BestFemaleModel)
##
## LC_SVD_Female LCwithGAM_Female NN_Boost_Female RotatedLC_Female
## 7 4 9 7
## Stacked_Female
## 11
table(HMD_BestResults$BestMaleModel)
##
## LC_SVD_Male LCwithGAM_Male NN_Boost_Male RotatedLC_Male Stacked_Male
## 3 14 7 9 5
Looks like NN_Boost and Stacked are the best option for females. And the LCwithGAM for males.
Let’s check the NN_Boost advantage to the simple NN solution in each country for both genders:
temp_df <- reshape2::melt(HMD_BestResults[,c("Country", "NN_Male", "NN_Boost_Male")],
id.var = "Country")
ggplot(temp_df, aes(x = Country, y = value, color = variable, group=variable)) +
geom_line()
temp_df <- reshape2::melt(HMD_BestResults[,c("Country", "NN_Female", "NN_Boost_Female")],
id.var = "Country")
ggplot(temp_df, aes(x = Country, y = value, color = variable, group=variable)) +
geom_line()
It seems that the \(MSE\) of NN_Boost is always better or about the same as the \(MSE\) of the simple NN.
So, next we generate a variable that measures the NN_boost advantage in \(MSE\) compared to the best not NN model.
HMD_BestResults$BestMaleMSE_notNN <- apply(HMD_BestResults[,c(2,3,6,7)], 1, FUN = min)
HMD_BestResults$NN_boost_vs_Best_notNN_male <- HMD_BestResults$NN_Boost_Male - HMD_BestResults$BestMaleMSE_notNN
HMD_BestResults$BestFemaleMSE_notNN <- apply(HMD_BestResults[,c(8,9,12,13)], 1, FUN = min)
HMD_BestResults$NN_boost_vs_Best_notNN_female <- HMD_BestResults$NN_Boost_Female - HMD_BestResults$BestFemaleMSE_notNN
str(HMD_BestResults)
## 'data.frame': 38 obs. of 21 variables:
## $ Country : chr "AUS" "AUT" "BEL" "BGR" ...
## $ LC_SVD_Male : num 0.0612 0.1017 0.0937 0.054 0.2347 ...
## $ LCwithGAM_Male : num 0.1159 0.0588 0.0774 0.0807 0.2646 ...
## $ NN_Boost_Male : num 0.0771 0.1174 0.0841 0.0632 0.1469 ...
## $ NN_Male : num 0.0995 0.1585 0.1122 0.0691 0.175 ...
## $ RotatedLC_Male : num 0.0599 0.0558 0.073 0.2121 0.3199 ...
## $ Stacked_Male : num 0.0708 0.0717 0.0735 0.1061 0.2313 ...
## $ LC_SVD_Female : num 0.0434 0.0823 0.0648 0.0725 0.1175 ...
## $ LCwithGAM_Female : num 0.0756 0.0753 0.077 0.0776 0.281 ...
## $ NN_Boost_Female : num 0.0478 0.0914 0.0723 0.0768 0.1084 ...
## $ NN_Female : num 0.0548 0.1042 0.0789 0.0889 0.1227 ...
## $ RotatedLC_Female : num 0.0412 0.0795 0.066 0.1051 0.3553 ...
## $ Stacked_Female : num 0.0424 0.0751 0.0665 0.071 0.2327 ...
## $ BestFemaleMSE : num 0.0412 0.0751 0.0648 0.071 0.1084 ...
## $ BestFemaleModel : chr "RotatedLC_Female" "Stacked_Female" "LC_SVD_Female" "Stacked_Female" ...
## $ BestMaleMSE : num 0.0599 0.0558 0.073 0.054 0.1469 ...
## $ BestMaleModel : chr "RotatedLC_Male" "RotatedLC_Male" "RotatedLC_Male" "LC_SVD_Male" ...
## $ BestMaleMSE_notNN : num 0.0599 0.0558 0.073 0.054 0.2313 ...
## $ NN_boost_vs_Best_notNN_male : num 0.01715 0.06162 0.01114 0.00921 -0.08444 ...
## $ BestFemaleMSE_notNN : num 0.0412 0.0751 0.0648 0.071 0.1175 ...
## $ NN_boost_vs_Best_notNN_female: num 0.00667 0.01623 0.00754 0.00577 -0.00905 ...
See the best models in each country on the world map!
First, download the shapefile:
# Download the shapefile.
download.file("http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip" , destfile="GeoData/world_shape_file.zip")
# Unzip the file.
system("unzip GeoData/world_shape_file.zip")
Load the shapefile to R:
library(rgdal)
# Rread the shape file
world_spdf <- readOGR(
dsn= paste0(getwd(),"/GeoData") ,
layer="TM_WORLD_BORDERS_SIMPL-0.3",
verbose=FALSE
)
# Save the original order of countries so that we don't mess up the shapefile with the merge
world_spdf@data$OriginalOrder <- 1:nrow(world_spdf@data)
Cleaning the country codes in the HMD_BestResults dataframe:
# Creating the dataframe to join
HMD_ResultsToMap <- HMD_BestResults[,c("Country", "BestFemaleModel", "BestMaleModel",
"NN_boost_vs_Best_notNN_male", "NN_boost_vs_Best_notNN_female")]
# Unify some country codes
HMD_ResultsToMap$Country[HMD_ResultsToMap$Country == "FRATNP"] <- "FRA"
HMD_ResultsToMap$Country[HMD_ResultsToMap$Country == "DEUTW"] <- "DEU"
HMD_ResultsToMap$Country[HMD_ResultsToMap$Country == "NZL_NP"] <- "NZL"
HMD_ResultsToMap$Country[HMD_ResultsToMap$Country == "GBR_NP"] <- "GBR"
Join with relevant variables from HMD_ResultsToMap:
ShapeDataTemp <- merge(x=world_spdf@data, y=HMD_ResultsToMap, by.x="ISO3", by.y="Country", all.x=TRUE)
# Clean up the categorical variables and convert them to factor
ShapeDataTemp$BestMaleModel[is.na(ShapeDataTemp$BestMaleModel)] <- "No_Data"
ShapeDataTemp$BestFemaleModel[is.na(ShapeDataTemp$BestFemaleModel)] <- "No_Data"
ShapeDataTemp$BestFemaleModel <- as.factor(ShapeDataTemp$BestFemaleModel)
ShapeDataTemp$BestMaleModel <- as.factor(ShapeDataTemp$BestMaleModel)
world_spdf@data <- ShapeDataTemp[order(ShapeDataTemp$OriginalOrder),]
Draw the map of the best models for females!
library(leaflet)
# Create a color palette for the map
mypalette <- colorFactor(c("green", "blue", "orange", "grey", "red", "yellow"),
world_spdf@data$BestFemaleModel)
# Prepare the text for tooltips
mytext <- paste(
"Country: ", world_spdf@data$NAME,"<br/>",
"Best Model Female: ", world_spdf@data$BestFemaleModel, "<br/>",
"NN_boost_vs_Best_notNN_female: ", round(world_spdf@data$NN_boost_vs_Best_notNN_female, 3),
sep="") %>%
lapply(htmltools::HTML)
# Create map object
m <- leaflet(world_spdf) %>%
addTiles() %>%
setView( lat=10, lng=0 , zoom=2) %>%
addPolygons(
fillColor = ~mypalette(BestFemaleModel),
stroke=FALSE,
label = mytext,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "13px",
direction = "auto"
)
) %>%
addLegend(pal=mypalette, values=~BestFemaleModel, opacity=0.9, title = "Best Model for Females", position = "bottomleft")
# Show map object
m
# Save map in a html file
htmlwidgets::saveWidget(m, file="BestFemaleModel.html")
Draw the map of the best models for males!
library(leaflet)
# Create a color palette for the map
mypalette <- colorFactor(c("green", "blue", "orange", "grey", "red", "yellow"),
world_spdf@data$BestMaleModel)
# Prepare the text for tooltips
mytext <- paste(
"Country: ", world_spdf@data$NAME,"<br/>",
"Best Model Male: ", world_spdf@data$BestMaleModel, "<br/>",
"NN_boost_vs_Best_notNN_male: ", round(world_spdf@data$NN_boost_vs_Best_notNN_male, 3),
sep="") %>%
lapply(htmltools::HTML)
# Create map object
m <- leaflet(world_spdf) %>%
addTiles() %>%
setView( lat=10, lng=0 , zoom=2) %>%
addPolygons(
fillColor = ~mypalette(BestMaleModel),
stroke=FALSE,
label = mytext,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "13px",
direction = "auto"
)
) %>%
addLegend(pal=mypalette, values=~BestMaleModel, opacity=0.9, title = "Best Model for Males", position = "bottomleft")
# Show map object
m
# Save map in a html file
htmlwidgets::saveWidget(m, file="BestMaleModel.html")
Draw the map of NN_boost advantage in \(MSE\) compared to the best not NN model for females!
library(leaflet)
# Set the differences on a logaritmic scale
world_spdf@data$Log_NN_boost_vs_Best_notNN_female <- log(world_spdf@data$NN_boost_vs_Best_notNN_female - (min(world_spdf@data$NN_boost_vs_Best_notNN_female, na.rm = TRUE)-0.09))
# Create a color palette for the map
mypalette <- colorNumeric(palette = "RdYlGn",
domain = world_spdf@data$Log_NN_boost_vs_Best_notNN_female,
na.color="transparent",
reverse = TRUE)
# Prepare the text for tooltips
mytext <- paste(
"Country: ", world_spdf@data$NAME,"<br/>",
"Best Model Female: ", world_spdf@data$BestMaleFemodel, "<br/>",
"Log_NN_boost_vs_Best_notNN_female: ", round(world_spdf@data$Log_NN_boost_vs_Best_notNN_female, 3),
sep="") %>%
lapply(htmltools::HTML)
# Create map object
m <- leaflet(world_spdf) %>%
addTiles() %>%
setView( lat=10, lng=0 , zoom=2) %>%
addPolygons(
fillColor = ~mypalette(Log_NN_boost_vs_Best_notNN_female),
stroke=TRUE,
fillOpacity = 0.9,
color="white",
weight=0.3,
label = mytext,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "13px",
direction = "auto"
)
) %>%
addLegend(pal=mypalette, values=~Log_NN_boost_vs_Best_notNN_female, opacity=0.9, title = "Log_NN_boost advantage in MSE - Female", position = "bottomleft")
# Show map object
m
# Save map in a html file
htmlwidgets::saveWidget(m, file="NNboostAdvantageFemale.html")
Draw the map of NN_boost advantage in \(MSE\) compared to the best not NN model for males!
library(leaflet)
# Set the differences on a logaritmic scale
world_spdf@data$Log_NN_boost_vs_Best_notNN_male <- log(world_spdf@data$NN_boost_vs_Best_notNN_male - (min(world_spdf@data$NN_boost_vs_Best_notNN_male, na.rm = TRUE)-0.09))
# Create a color palette for the map
mypalette <- colorNumeric(palette = "RdYlGn",
domain = world_spdf@data$Log_NN_boost_vs_Best_notNN_male,
na.color="transparent",
reverse = TRUE)
# Prepare the text for tooltips
mytext <- paste(
"Country: ", world_spdf@data$NAME,"<br/>",
"Best Model male: ", world_spdf@data$BestMaleFemodel, "<br/>",
"Log_NN_boost_vs_Best_notNN_male: ", round(world_spdf@data$Log_NN_boost_vs_Best_notNN_male, 3),
sep="") %>%
lapply(htmltools::HTML)
# Create map object
m <- leaflet(world_spdf) %>%
addTiles() %>%
setView( lat=10, lng=0 , zoom=2) %>%
addPolygons(
fillColor = ~mypalette(Log_NN_boost_vs_Best_notNN_male),
stroke=TRUE,
fillOpacity = 0.9,
color="white",
weight=0.3,
label = mytext,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "13px",
direction = "auto"
)
) %>%
addLegend(pal=mypalette, values=~Log_NN_boost_vs_Best_notNN_male, opacity=0.9, title = "Log_NN_boost advantage in MSE - male", position = "bottomleft")
# Show map object
m
# Save map in a html file
htmlwidgets::saveWidget(m, file="NNboostAdvantageMale.html")
Load packages for the spatial autocorrelation analysis:
library(sf)
library(spdep)
Load the shapefile of world countries with the sf package:
s <- st_read( "GeoData/TM_WORLD_BORDERS_SIMPL-0.3.shp")
## Reading layer `TM_WORLD_BORDERS_SIMPL-0.3' from data source
## `E:\Egyetem Phd\8. félév\LongevityRisk+NN\GeoData\TM_WORLD_BORDERS_SIMPL-0.3.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 246 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -90 xmax: 180 ymax: 83.57027
## Geodetic CRS: WGS 84
Add the NN_boost_vs_Best_notNN_male and NN_boost_vs_Best_notNN_female variables to the s shape data.
ShapeDataTemp <- merge(x=s, y=HMD_ResultsToMap[,c("Country",
"NN_boost_vs_Best_notNN_male",
"NN_boost_vs_Best_notNN_female")],
by.x="ISO3", by.y="Country", all.x=TRUE)
s <- ShapeDataTemp
str(s)
## Classes 'sf' and 'data.frame': 246 obs. of 14 variables:
## $ ISO3 : chr "ABW" "AFG" "AGO" "AIA" ...
## $ FIPS : chr "AA" "AF" "AO" "AV" ...
## $ ISO2 : chr "AW" "AF" "AO" "AI" ...
## $ UN : int 533 4 24 660 248 8 20 530 784 32 ...
## $ NAME : chr "Aruba" "Afghanistan" "Angola" "Anguilla" ...
## $ AREA : int 0 65209 124670 0 0 2740 0 80 8360 273669 ...
## $ POP2005 : num 102897 25067407 16095214 12256 0 ...
## $ REGION : int 19 142 2 19 150 150 150 19 142 19 ...
## $ SUBREGION : int 29 34 17 29 154 39 39 29 145 5 ...
## $ LON : num -70 65.2 17.5 -63 20 ...
## $ LAT : num 12.5 33.7 -12.3 18.2 60.2 ...
## $ NN_boost_vs_Best_notNN_male : num NA NA NA NA NA NA NA NA NA NA ...
## $ NN_boost_vs_Best_notNN_female: num NA NA NA NA NA NA NA NA NA NA ...
## $ geometry :sfc_MULTIPOLYGON of length 246; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:4, 1:2] -69.9 -70.1 -70.1 -69.9 12.4 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:13] "ISO3" "FIPS" "ISO2" "UN" ...
Ok, we have the two new variables.
Let’s define “neighboring” polygons.
sf::sf_use_s2(FALSE)
#s <- na.omit(s)
nb <- poly2nb(s, queen=TRUE)
Each neighboring polygon will be assigned equal weight when computing the neighboring NN advantages in \(MSE\).
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
Computing the Moran’s \(I\) statistic for Females.
NN_boost_vs_Best_notNN_female_Lag <- lag.listw(lw, s$NN_boost_vs_Best_notNN_female,
zero.policy = FALSE, NAOK = TRUE)
plot(NN_boost_vs_Best_notNN_female_Lag ~ s$NN_boost_vs_Best_notNN_female, pch=16, asp=1)
MoranOLS <- lm(NN_boost_vs_Best_notNN_female_Lag ~ s$NN_boost_vs_Best_notNN_female)
abline(MoranOLS, col="blue")
summary(MoranOLS)
##
## Call:
## lm(formula = NN_boost_vs_Best_notNN_female_Lag ~ s$NN_boost_vs_Best_notNN_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.037878 -0.017439 -0.009258 0.006185 0.140546
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01060 0.01001 1.059 0.305
## s$NN_boost_vs_Best_notNN_female 0.51415 0.20085 2.560 0.021 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04015 on 16 degrees of freedom
## (228 observations deleted due to missingness)
## Multiple R-squared: 0.2906, Adjusted R-squared: 0.2462
## F-statistic: 6.553 on 1 and 16 DF, p-value: 0.02098
Ok, we have some moderate spatial autocorrelation (\(0.51\)) for females. The autpcorrelation is significant at \(\alpha=0.05\), but not at \(\alpha=0.01\) level.
Computing the Moran’s \(I\) statistic for Males.
NN_boost_vs_Best_notNN_male_Lag <- lag.listw(lw, s$NN_boost_vs_Best_notNN_male,
zero.policy = FALSE, NAOK = TRUE)
plot(NN_boost_vs_Best_notNN_male_Lag ~ s$NN_boost_vs_Best_notNN_male, pch=16, asp=1)
MoranOLS <- lm(NN_boost_vs_Best_notNN_male_Lag ~ s$NN_boost_vs_Best_notNN_male)
abline(MoranOLS, col="blue")
summary(MoranOLS)
##
## Call:
## lm(formula = NN_boost_vs_Best_notNN_male_Lag ~ s$NN_boost_vs_Best_notNN_male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.07614 -0.01560 -0.00134 0.01225 0.11091
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.007235 0.010088 0.717 0.4836
## s$NN_boost_vs_Best_notNN_male 0.516516 0.180900 2.855 0.0115 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03816 on 16 degrees of freedom
## (228 observations deleted due to missingness)
## Multiple R-squared: 0.3375, Adjusted R-squared: 0.2961
## F-statistic: 8.153 on 1 and 16 DF, p-value: 0.01146
Ok, we have some moderate spatial autocorrelation (\(0.52\)) for males. The autpcorrelation is significant at \(\alpha=0.05\), but not at \(\alpha=0.01\) level.
The level of spatial autocorrelation is similar for males and for females, but for males it is seems that some outlier effects are more responsible.